ngl: add front cache to reduce uniform changes
authorChristian Hergert <chergert@redhat.com>
Fri, 12 Mar 2021 18:16:52 +0000 (10:16 -0800)
committerChristian Hergert <chergert@redhat.com>
Fri, 12 Mar 2021 19:22:59 +0000 (11:22 -0800)
Since we make full snapshots when recording uniform state of batches, we
need to perform some deduplication to avoid so many repeated uniform calls.

This uses a closed hashtable to determine if we are likely changing the
value to something new.

This does not currently compare values, it instead only compares that we
are going to point at a new offset into the uniform buffer. We could go
further if we compare upon updating values (we did that early on in the
prototype) so that offsets are less likely to be changed.

gsk/ngl/gsknglcommandqueue.c
gsk/ngl/gskngluniformstate.c
gsk/ngl/gskngluniformstateprivate.h

index 9426c199b1468dc279d3c21a68abc911f39b8fc8..f0aaf1806f8198509d31938e4a202aef840c6d26 100644 (file)
@@ -731,98 +731,6 @@ gsk_ngl_command_queue_delete_program (GskNglCommandQueue *self,
   glDeleteProgram (program);
 }
 
-static inline void
-apply_uniform (gconstpointer     dataptr,
-               GskNglUniformInfo info,
-               guint             location)
-{
-  g_assert (dataptr != NULL);
-  g_assert (info.format > 0);
-  g_assert (location < GL_MAX_UNIFORM_LOCATIONS);
-
-  switch (info.format)
-    {
-    case GSK_NGL_UNIFORM_FORMAT_1F:
-      glUniform1fv (location, 1, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_2F:
-      glUniform2fv (location, 1, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_3F:
-      glUniform3fv (location, 1, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_4F:
-      glUniform4fv (location, 1, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_1FV:
-      glUniform1fv (location, info.array_count, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_2FV:
-      glUniform2fv (location, info.array_count, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_3FV:
-      glUniform3fv (location, info.array_count, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_4FV:
-      glUniform4fv (location, info.array_count, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_1I:
-    case GSK_NGL_UNIFORM_FORMAT_TEXTURE:
-      glUniform1iv (location, 1, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_2I:
-      glUniform2iv (location, 1, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_3I:
-      glUniform3iv (location, 1, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_4I:
-      glUniform4iv (location, 1, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_1UI:
-      glUniform1uiv (location, 1, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_MATRIX: {
-      float mat[16];
-      graphene_matrix_to_float (dataptr, mat);
-      glUniformMatrix4fv (location, 1, GL_FALSE, mat);
-#if 0
-      /* TODO: If Graphene can give us a peek here on platforms
-       * where the format is float[16] (most/all x86_64?) then
-       * We can avoid the SIMD operation to convert the format.
-       */
-      G_STATIC_ASSERT (sizeof (graphene_matrix_t) == 16*4);
-      glUniformMatrix4fv (location, 1, GL_FALSE, dataptr);
-#endif
-    }
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_COLOR:
-      glUniform4fv (location, 1, dataptr);
-    break;
-
-    case GSK_NGL_UNIFORM_FORMAT_ROUNDED_RECT:
-      glUniform4fv (location, 3, dataptr);
-    break;
-
-    default:
-      g_assert_not_reached ();
-    }
-}
-
 static inline void
 apply_viewport (guint *current_width,
                 guint *current_height,
@@ -1187,8 +1095,7 @@ gsk_ngl_command_queue_execute (GskNglCommandQueue   *self,
               const GskNglCommandUniform *u = &self->batch_uniforms.items[batch->draw.uniform_offset];
 
               for (guint i = 0; i < batch->draw.uniform_count; i++, u++)
-                apply_uniform (gsk_ngl_uniform_state_get_uniform_data (self->uniforms, u->info.offset),
-                               u->info, u->location);
+                gsk_ngl_uniform_state_apply (self->uniforms, program, u->location, u->info);
 
               n_uniforms += batch->draw.uniform_count;
             }
index 9b896e7d1a4057a67be563d6405363e5481be68e..c6fc9b5b67cb229f8cde261afe0464df1f044871 100644 (file)
@@ -65,6 +65,8 @@ gsk_ngl_uniform_state_new (void)
   state->values_pos = 0;
   state->values_buf = g_malloc (4096);
 
+  memset (state->apply_hash, 0, sizeof state->apply_hash);
+
   return g_steal_pointer (&state);
 }
 
@@ -221,6 +223,8 @@ gsk_ngl_uniform_state_end_frame (GskNglUniformState *state)
   state->values_pos = allocator;
 
   g_assert (allocator <= state->values_len);
+
+  memset (state->apply_hash, 0, sizeof state->apply_hash);
 }
 
 gsize
index 1385f93dac6783807846e757246d2a156d3a7256..6185087a6898b6b9c2e67b0d5db66beff987dfb6 100644 (file)
@@ -83,6 +83,7 @@ typedef struct _GskNglUniformState
   guint8 *values_buf;
   guint values_pos;
   guint values_len;
+  GskNglUniformInfo apply_hash[512];
 } GskNglUniformState;
 
 /**
@@ -680,6 +681,137 @@ gsk_ngl_uniform_state_set4fv (GskNglUniformState   *state,
     }
 }
 
+static inline guint
+gsk_ngl_uniform_state_fmix (guint program,
+                            guint location)
+{
+  guint h = (program << 16) | location;
+
+   h ^= h >> 16;
+   h *= 0x85ebca6b;
+   h ^= h >> 13;
+   h *= 0xc2b2ae35;
+   h ^= h >> 16;
+
+   return h;
+}
+
+/*
+ * gsk_ngl_uniform_state_apply:
+ * @state: the uniform state
+ * @program: the program id
+ * @location: the location of the uniform
+ * @offset: the offset of the data within the buffer
+ * @info: the uniform info
+ *
+ * This function can be used to apply state that was previously recorded
+ * by the #GskNglUniformState.
+ *
+ * It is specifically useful from the GskNglCommandQueue to execute uniform
+ * changes but only when they have changed from the current value.
+ */
+static inline void
+gsk_ngl_uniform_state_apply (GskNglUniformState *state,
+                             guint               program,
+                             guint               location,
+                             GskNglUniformInfo   info)
+{
+  guint index = gsk_ngl_uniform_state_fmix (program, location) % G_N_ELEMENTS (state->apply_hash);
+  gconstpointer dataptr = GSK_NGL_UNIFORM_VALUE (state->values_buf, info.offset);
+
+  /* aligned, can treat as unsigned */
+  if (*(guint *)&info == *(guint *)&state->apply_hash[index])
+    return;
+
+  state->apply_hash[index] = info;
+
+  /* TODO: We could do additional comparisons here to make sure we are
+   *       changing state.
+   */
+
+  switch (info.format)
+    {
+    case GSK_NGL_UNIFORM_FORMAT_1F:
+      glUniform1fv (location, 1, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_2F:
+      glUniform2fv (location, 1, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_3F:
+      glUniform3fv (location, 1, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_4F:
+      glUniform4fv (location, 1, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_1FV:
+      glUniform1fv (location, info.array_count, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_2FV:
+      glUniform2fv (location, info.array_count, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_3FV:
+      glUniform3fv (location, info.array_count, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_4FV:
+      glUniform4fv (location, info.array_count, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_1I:
+    case GSK_NGL_UNIFORM_FORMAT_TEXTURE:
+      glUniform1iv (location, 1, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_2I:
+      glUniform2iv (location, 1, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_3I:
+      glUniform3iv (location, 1, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_4I:
+      glUniform4iv (location, 1, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_1UI:
+      glUniform1uiv (location, 1, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_MATRIX: {
+      float mat[16];
+      graphene_matrix_to_float (dataptr, mat);
+      glUniformMatrix4fv (location, 1, GL_FALSE, mat);
+#if 0
+      /* TODO: If Graphene can give us a peek here on platforms
+       * where the format is float[16] (most/all x86_64?) then
+       * We can avoid the SIMD operation to convert the format.
+       */
+      G_STATIC_ASSERT (sizeof (graphene_matrix_t) == 16*4);
+      glUniformMatrix4fv (location, 1, GL_FALSE, dataptr);
+#endif
+    }
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_COLOR:
+      glUniform4fv (location, 1, dataptr);
+    break;
+
+    case GSK_NGL_UNIFORM_FORMAT_ROUNDED_RECT:
+      glUniform4fv (location, 3, dataptr);
+    break;
+
+    default:
+      g_assert_not_reached ();
+    }
+}
+
 G_END_DECLS
 
 #endif /* GSK_NGL_UNIFORM_STATE_PRIVATE_H */